# Script to run QAP
library(sna)
library(doParallel)
library(doRNG)

# Load the followers adjacency matrix
y <- readRDS("processed_data/retweets_adjacencyMatrix.Rds")
# Load the predicting matrix 
load("processed_data/QAP_predicting_matrices.RData")

diag(y) <- NA

y_v <- c(y)


Var.names <-c("State_Similarity",
              "Party_Similarity",
              "Chamber_Similarity",
              "Gender_Similarity",
              "Race_Similarity",
              "Difference_in_Legislatures_Profeshionalism",
              "Dem_Sender_Effect",
              "Rep_Sender_Effect",
              "House_Sender_Effect",
              "Female_Sender_Effect",
              "Profesh_Sender_Effect",
              "Black_Sender_Effect",
              "Latino_Sender_Effect",
              "Asian_Sender_Effect",
              "Mena_Sender_Effect",
              "Multi_Sender_Effect",
              "Native_Sender_Effect",
              "Democrat_Receiver_Effect",
              "Republican_Receiver_Effect",
              "House_Receiver_Effect",
              "Female_Receiver_Effect",
              "Profesh_Receiver_Effect",
              "Black_Receiver_Effect",
              "Latino_Receiver_Effect",
              "Asian_Receiver_Effect",
              "Mena_Receiver_Effect",
              "Multi_Receiver_Effect",
              "Native_Receiver_Effect",
              "Same_Party_Same_State",
              "Same_Chamber_Same_State",
              "Same_Gender_Same_State",
              "Same_Race_Same_State",
              "Contiguous_States")

for(i in 1:length(Var.names)){
  xi <- predicting_matrices[i,,]
  diag(xi) <- NA
  print(sum(is.na(xi)))
  if(i == 1){
    xdf <- cbind(c(xi))
  }
  if(i > 1){
    xdf <- cbind(xdf,cbind(c(xi)))
  }
}

send_id <- matrix(1:nrow(y),nrow(y),nrow(y))
rec_id <- t(send_id)
diag(send_id) <- NA
diag(rec_id) <- NA

xdf <- cbind(xdf,c(send_id),c(rec_id))

xdf <- data.frame(xdf)

names(xdf) <- c(Var.names,c("send_id","rec_id"))

xydf <- data.frame(y_v,xdf)

xynd <- na.omit(xydf)

covs <- paste(Var.names,collapse="+")

names(xynd)[1] <- "yij"

library(fastglm)
system.time(est0 <- fastglm(as.matrix(xynd[,Var.names]),xynd$yij,family=gaussian(),method=3))



load("simulated_retweet_networks.RData")
rm(xdf)
gc()
rm(predicting_matrices)
gc()
rm(xynd)
gc()

library(biglm)

estim <- NULL
twop <- NULL
for(i in 1:50){
  
  # qap unpermuted
  ys <- sim_amats[[i]]
  
  diag(ys) <- NA
  
  y_vs <- c(y)
  
  xydf$y_vs <- y_vs
  
  covs <- paste(Var.names,collapse="+")
  
  form <- as.formula(paste("y_vs~",covs,sep=""))
  
  system.time(est_true <-  biglm(form,data=xydf))
  
  filenm =paste("./qap_power_results/qap_power_results/qap_sim_retweet",i,".RData",sep="")
  load(filenm)
  estim <- cbind(estim,coef(est_true))
  
  obs_ts <- summary(est_true)$mat[,1]/summary(est_true)$mat[,4]
  
  pgabs <- NULL
  for(j in 1:length(coef(est_true))){
    pgabs <- c(pgabs,mean(abs(perm_coef[,j]) > abs(obs_ts[j])))
  }
  
  twop <- cbind(twop,pgabs)
  
  print(i)
}

psig <- NULL
for(i in 1:length(coef(est0))){
  same_sign <- sign(estim[i+1,]) == sign(coef(est0))[i]
  sig <- mean((twop[i+1,] <= 0.05)*same_sign)
  psig <- c(psig,sig)
}

# calculate standardized coefficients
abs_std_coef <- coef(est0)/est0$se
abs_std_coef <- abs(abs_std_coef)

names(coef(est0))[abs_std_coef > 2 & psig < 0.8]

library(ggplot2)
# Basic scatter plot
df <- data.frame(power = psig,zstat = abs_std_coef,
                 col = ifelse(abs_std_coef > 2 & psig < 0.8,"red","blue"),
                 shp = ifelse(abs_std_coef > 2 & psig < 0.8,17,19))

ggplot(df, aes(x=zstat, y=power)) +
  geom_point(size=2,shape =df$shp,colour=df$col) + 
  scale_x_continuous(trans='log2') +
  geom_hline(yintercept=0.8) +
  geom_vline(xintercept=2) +
  theme_bw()

ggsave("power_results_retweet.pdf",width=4,height=3.5)
underpowered <- names(coef(est0))[abs_std_coef > 2 & psig < 0.8]
write.csv(underpowered,file="underpowered_retweet.csv")

